/* ******************************************************************************************************* */
/* ********************************* Construct mutual fund holdings dataset ********************************/
/* ******************************************************************************************************* */

*** Consolidate S12 holdings;
data mflink2; set mflinks.mflink2;
  if wficn ne .;

proc sql;
  create table s12_holdings as
  select a.*, b.wficn
  from s12.holdings as a, mflink2 as b
  where a.fundno = b.fundno and a.rdate = b.rdate and a.fdate = b.fdate and year(a.rdate) >= 2002;
quit;

proc sort; by wficn rdate fdate permno cfacshr_rdate;

proc means noprint; by wficn rdate fdate permno cfacshr_rdate;
  var market_value shares_adj wt_adj;
  output out = s12_holdings (drop=_type_ _freq_) sum = market_value shares_adj wt_adj;

proc means noprint; by wficn rdate fdate;
  var market_value;
  output out = s12_sum (drop=_type_ _freq_) sum = port_value;

data temp.s12_holdings; merge s12_holdings s12_sum; by wficn rdate fdate;
  pwt = market_value/port_value;
run;


********************************************** CRSP Mutual Fund holdings ******************************************************;
**** Retrieve observations in CRSPMF.holdings with missing permno and compute the median stock price at the report date;
proc sql; 
  create table crspmf_holdings as
  select a.*, b.security_name, b.permno, b.cusip, b.permco, b.ticker
  from crspmf.holdings as a left join crspmf.holdings_co_info as b
  on a.crsp_company_key = b.crsp_company_key;
quit;

data missing_permno; set crspmf_holdings;
  if permno = .;
  prc = abs(market_val/nbr_shares);

proc sort; by crsp_company_key cusip ticker security_name report_dt;

proc means data=missing_permno noprint; by crsp_company_key cusip ticker security_name report_dt;
  var prc;
  output out = missing_permno (drop=_type_ _freq_) median = prc_p50 p25 = pcr_p25 p75 = pcr_p75 n = num_funds;
run;

**** Match by 6-digit CUSIP AND ticker;
proc sort data=crsp.stocknames(keep=permco permno ncusip cusip ticker comnam namedt nameenddt st_date end_date shrcd exchcd where=(not missing(ncusip))) 
  out=cusips nodupkey; by permco permno ncusip;
run;

proc sql;
  create table missing_permno1 as
  select a.*, b.permno, b.permco, b.comnam, b.namedt, b.nameenddt
  from missing_permno as a left join cusips as b
  on substr(a.cusip, 1, 6) = substr(b.cusip, 1, 6) and a.ticker = b.ticker and b.namedt <= a.report_dt <= nameenddt;
quit;

data missing_permno1a; set missing_permno1; 
  if namedt ne .;

proc sort; by crsp_company_key cusip ticker security_name report_dt permno ;

data missing_permno1a; set missing_permno1a; by crsp_company_key cusip ticker security_name report_dt permno;
  if last.permno;

**** Check if the stock price on the report date matches with that in CRSP;
proc sql;
  create table missing_permno1a as
  select a.*, abs(b.prc) as crsp_price
  from missing_permno1a as a left join crsp.msf as b
  on a.permno = b.permno and intck('month', b.date, a.report_dt) = 0;
quit;

data missing_permno1a; set missing_permno1a;
  if crsp_price*.9 <= prc_p50 <= crsp_price*1.1 then included = 1; else included = 0;
  diff = abs(prc_p50-crsp_price);

proc sort; by crsp_company_key cusip ticker security_name permno permco comnam;

proc means data = missing_permno1a noprint; by crsp_company_key cusip ticker security_name permno permco comnam;
  var included;
  output out = temp (drop=_type_ _freq_) mean = included n = num_dates; 

proc sort data = temp; by crsp_company_key cusip ticker security_name included;

data missing_permno1x; set temp; by crsp_company_key cusip ticker security_name;
  if last.security_name and included >= .5;

proc sort; by crsp_company_key cusip ticker security_name permno;

proc sort data = missing_permno1; by crsp_company_key cusip ticker security_name permno;

data missing_permno1a_keep; merge missing_permno1 (in=a) missing_permno1x (in=b); by crsp_company_key cusip ticker security_name permno;
  if a and b;
run;

data missing_permno1b; merge missing_permno1 (in=a) missing_permno1x (in=b keep=crsp_company_key cusip ticker security_name permno); by crsp_company_key cusip ticker security_name permno; 
  if a and not b; 
  drop permco permno comnam namedt nameenddt;
run;

**** Match by 6-digit CUSIP if ticker is missing;
data missing_permno1ba; set missing_permno1b;
  if ticker = ''; 

proc sql;
  create table missing_permno1ba as
  select a.*, b.permno, b.permco, b.comnam, b.namedt, b.nameenddt
  from missing_permno1ba as a left join cusips as b
  on a.ticker = '' and substr(a.cusip, 1, 6) = substr(b.cusip, 1, 6) and b.namedt <= a.report_dt <= nameenddt;
quit;

**** Match by ticker if ticker is not missing;
data missing_permno1bb; set missing_permno1b;
  if ticker ne ''; 

proc sql;
  create table missing_permno1bb as
  select a.*, b.permno, b.permco, b.comnam, b.namedt, b.nameenddt
  from missing_permno1bb as a left join cusips as b
  on a.ticker = b.ticker and b.namedt <= a.report_dt <= nameenddt;
quit;

data missing_permno2; set missing_permno1ba (in=a) missing_permno1bb (in=b);
run;

**** Check if the stock price on the report date matches with that in CRSP;
data missing_permno2a; set missing_permno2; 
  if namedt ne .;

proc sql;
  create table missing_permno2a as
  select a.*, abs(b.prc) as crsp_price
  from missing_permno2a as a left join crsp.msf as b
  on a.permno = b.permno and intck('month', b.date, a.report_dt) = 0;
quit;

data missing_permno2x; set missing_permno2a;
  if crsp_price*.95 <= prc_p50 <= crsp_price*1.05 then included = 1; else included = 0;
  diff = abs(prc_p50-crsp_price);

proc sort data = missing_permno2x; by crsp_company_key cusip ticker security_name permno permco comnam namedt nameenddt;

proc means noprint; by crsp_company_key cusip ticker security_name permno permco comnam namedt nameenddt;
  var included;
  output out = temp (drop=_type_ _freq_) mean = included n = num_dates; 

proc sort; by crsp_company_key cusip ticker security_name included; 

data missing_permno2x; set temp; by crsp_company_key cusip ticker security_name included; 
  if last.security_name and included >= .8;

proc sort; by crsp_company_key cusip ticker security_name permno;

proc sort data = missing_permno; by crsp_company_key cusip ticker security_name;

data missing_permno2a_keep; merge missing_permno (in=a) missing_permno2x (in=b); by crsp_company_key cusip ticker security_name;
  if a and b;

data temp.holdings_fix; set missing_permno1a_keep missing_permno2a_keep;

proc sort nodupkey data = temp.holdings_fix; by crsp_company_key report_dt; 
run;
**** End of filling in missing permno;


proc sort data = crspmf.portnomap out = portnomap (keep=crsp_cl_grp crsp_portno begdt enddt) nodupkey; by crsp_cl_grp crsp_portno begdt enddt; 
  where crsp_cl_grp ne . and crsp_portno ne .; run;

proc sql;
  create table holdings as
  select a.*, b.crsp_cl_grp
  from crspmf_holdings (keep=crsp_portno report_dt eff_dt permco permno market_val nbr_shares percent_tna security_rank crsp_company_key) as a, portnomap as b
  where a.crsp_portno = b.crsp_portno and b.begdt <= a.report_dt <= b.enddt;
quit;

proc sql;
  create table holdings as
  select a.*, b.permno as permno2, b.permco as permco2
  from holdings as a left join temp.holdings_fix as b
  on a.crsp_company_key = b.crsp_company_key and a.report_dt = b.report_dt;
quit;

data holdings; set holdings;
  if permno = . and permno2 ne . then permno = permno2;
  if permco = . and permco2 ne . then permco = permco2;
  drop permno2 permco2;

proc sort nodupkey data = holdings; by _ALL_;
run;


*** compute TNA;
proc sort data = crspmf.portnomap out = fundnomap (keep=crsp_cl_grp crsp_fundno begdt enddt) nodupkey; by crsp_cl_grp crsp_fundno begdt enddt; 
  where crsp_cl_grp ne .;

proc sql;
  create table fundnomap as
  select a.*, b.caldt, b.mtna
  from fundnomap as a, crspmf.monthly_tna as b
  where a.crsp_fundno = b.crsp_fundno; 
quit;

data fundnomap; set fundnomap;
  if mtna < 0 then mtna = .;

proc sort nodupkey; by crsp_cl_grp caldt crsp_fundno;

proc means noprint; by crsp_cl_grp caldt;
  var mtna;
  output out = tna (drop=_type_ _freq_) sum = mtna; run;

data tna; set tna;
  mend = intnx('month', caldt, 0, 'end'); 
  format mend date9.;

proc sort data = tna; by crsp_cl_grp mend caldt; 

data tna; set tna; by crsp_cl_grp mend;
  if last.mend;
run;
*** END compute TNA;

proc sql;
  create table holdings as
  select a.*, b.shrout as shrout_rdate, abs(b.prc)*b.shrout as market_cap, b.cfacshr as cfacshr_rdate, abs(b.prc) as prc_rdate
  from holdings as a left join crsp.msf as b
  on a.permno = b.permno and year(a.report_dt) = year(b.date) and month(a.report_dt) = month(b.date);
quit; 

proc sql;
  create table holdings as
  select a.*, b.shrout as shrout_fdate, b.cfacshr as cfacshr_fdate, abs(b.prc) as prc_fdate
  from holdings as a left join crsp.msf as b
  on a.permno = b.permno and year(a.eff_dt) = year(b.date) and month(a.eff_dt) = month(b.date); 
quit; 

proc sql;
  create table holdings as
  select a.*, b.shrout as shrout_fdate2, b.cfacshr as cfacshr_fdate2, abs(b.prc) as prc_fdate2
  from holdings as a left join crsp.dsf as b
  on a.permno = b.permno and a.eff_dt = b.date;
quit; 

proc sql;
  create table holdings as
  select a.*, b.mtna
  from holdings as a left join tna as b
  on a.crsp_cl_grp = b.crsp_cl_grp and year(a.report_dt) = year(b.caldt) and month(a.report_dt) = month(b.caldt); 
quit;
 
data tempholdings; set holdings;
  r_indiactor = 0; f_indiactor = 0;
  if round(prc_rdate*10)-5 <= round(market_val/nbr_shares*10) <= round(prc_rdate*10)+5 then r_indiactor = 1;  
  if round(prc_fdate*10)-5 <= round(market_val/nbr_shares*10) <= round(prc_fdate*10)+5 then f_indiactor = 1; 

proc sort data = tempholdings; by crsp_cl_grp report_dt crsp_company_key permno market_cap shrout_rdate cfacshr_rdate security_rank nbr_shares percent_tna;

proc sort; by crsp_cl_grp report_dt;

proc means noprint; by crsp_cl_grp report_dt;
  var r_indiactor f_indiactor;
  output out = indicator (drop=_type_ _freq_) mean = r_indiactor f_indiactor; 

data tempholdings; merge tempholdings (drop= r_indiactor f_indiactor) indicator; by crsp_cl_grp report_dt;
  if f_indiactor >= .75 and r_indiactor < .25 and cfacshr_fdate not in (., 0) and cfacshr_rdate not in (., 0) and cfacshr_fdate ne cfacshr_rdate then do; 
    nbr_shares = nbr_shares/(cfacshr_rdate/cfacshr_fdate); end; 
  **************** For fund-quarters using info as of the eff_date, adjust the number of shares for stock splits ************************;
  market_value = nbr_shares*prc_rdate;
  if shrout_rdate > 0 then wt = nbr_shares/(shrout_rdate*1000);

proc sort data = tempholdings; by crsp_cl_grp report_dt permno market_cap shrout_rdate cfacshr_rdate; *;

proc means noprint data = tempholdings; by crsp_cl_grp report_dt permno market_cap shrout_rdate cfacshr_rdate;
  var market_value nbr_shares wt percent_tna;
  output out = temp.holdings (drop=_type_ _freq_) sum = market_value nbr_shares wt percent_tna;

proc sort data = temp.holdings; by crsp_cl_grp report_dt;

proc means data = temp.holdings noprint; by crsp_cl_grp report_dt;
  var market_value; where market_value > 0;
  output out = port_val (drop=_type_ _freq_) sum = port_val;

data temp.holdings; merge temp.holdings port_val; by crsp_cl_grp report_dt;
  if market_value > 0 then pwt = market_value/port_val;
run;  

proc means data = temp.holdings n mean median min p1 p5 p25 p75 p95 p99 max; class report_dt;
  var pwt percent_tna;
run;
